HEAD ======= >>>>>>> 91d647234b2ced621689ab67381ceef66910ad66
This is the exploratory analysis for school in session versus break and the impacts on mean road speeds
library(sf)
library(tidyverse)
library(rgeoboundaries)
library(osmextract)
library(tmap)
library(kableExtra)
library(lubridate)
We use free and open speed traffic data from Uber for Nairobi, available here: https://movement.uber.com/cities/nairobi/downloads/speeds?lang=en-US&tp[y]=2019&tp[q]=1
This selection may be missing data from 11/12/2019 - 11/15/2019 This selection may be missing data from 9/5/2019 - 9/6/2019
Uber provides a toolkit software via npm to generate the respective road segments in a geo format. The package is available here: https://www.npmjs.com/package/movement-data-toolkit. We generated the road segments as geojson.
Important: Uber movement data covers the period
2018-2020. Via the attribute osm_way_id the speed traffic
information can be linked to the OpenStreetMap (OSM) road network. It is
not clear how well the ids from the covered period match with the
current OSM data. OSM ids are stable when attributes or the geometry of
a object is changed. Howevery newly added data and deleted objects are
potentially ommitted when using uber traffic information from years ago
but current OSM road network data.
#uber <- read_csv("movement-speeds-quarterly-by-hod-nairobi-2018-Q1.csv")
uber_jan <- read_csv("movement-speeds-hourly-nairobi-2019-1.csv")
uber_april <- read_csv("movement-speeds-hourly-nairobi-2019-4.csv")
nairobi_roads <- st_read("nairobi_2019.geojson")
## Reading layer `nairobi_2019' from data source
## `/Users/charlie/nairobi-uber-access/nairobi_2019.geojson' using driver `GeoJSON'
=======
0.1 1 Intro
This is the exploratory analysis for school in session versus break
and the impacts on mean road speeds
0.2 2.1 Libraries
library(sf)
## Linking to GEOS 3.11.1, GDAL 3.6.2, PROJ 9.1.1; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rgeoboundaries)
library(osmextract)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright.
## Check the package website, https://docs.ropensci.org/osmextract/, for more details.
library(tmap)
0.3 2.2 Data download
& process
#uber <- read_csv("movement-speeds-quarterly-by-hod-nairobi-2018-Q1.csv")
uber_feb <- read_csv("movement-speeds-hourly-nairobi-2019-2.csv")
uber_april <- read_csv("movement-speeds-hourly-nairobi-2019-4.csv")
nairobi_roads <- st_read("nairobi_2019.geojson")
## Reading layer `nairobi_2019' from data source
## `/home/mreinmuth/giscience/git/nairobi_uber/nairobi_2019.geojson'
## using driver `GeoJSON'
>>>>>>> 91d647234b2ced621689ab67381ceef66910ad66
## Simple feature collection with 402236 features and 5 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: 35.45218 ymin: -8785269 xmax: 17570570 ymax: 0.1982375
## Projected CRS: WGS 84 / Pseudo-Mercator
st_crs(nairobi_roads) <- 4326
<<<<<<< HEAD
holiday <- data.frame(
date=seq(as.Date("2019-02-06"), as.Date("2019-02-26"), by = "day"),
weekday=wday(seq(as.Date("2019-02-06"), as.Date("2019-02-26"), by = "day"), label=T),
day = day(seq(as.Date("2019-02-06"), as.Date("2019-02-26"), by = "day"))) |>
filter(!(weekday %in% c("Sat", "Sun")))
school <- data.frame(date=seq(as.Date("2019-01-02"), as.Date("2019-01-22"), by = "day"), weekday=wday(seq(as.Date("2019-01-02"), as.Date("2019-01-22"), by = "day"), label=T),
day = day(seq(as.Date("2019-02-06"), as.Date("2019-02-26"), by = "day")))|>
filter(!(weekday %in% c("Sat", "Sun")))
school_holiday <- uber_april |>
filter(day %in% holiday$day)
school_insession <- uber_jan |>
filter(day %in% school$day)
uber_ed <- rbind(
school_holiday, school_insession
)
4 Post-processing
uber_ed <- uber_ed |>
group_by(year, month, hour, segment_id, start_junction_id, end_junction_id, osm_way_id, osm_start_node_id, osm_end_node_id) |>
summarise(mean_speed_kph = mean(speed_kph_mean, na.rm = TRUE), .groups = "drop")
uber_ed$month_fact <- factor(uber_ed$month, labels = c("School Semester", "Holiday"))
uber_ed$hour_fact <- as.factor(uber_ed$hour)
nairobi_roads$osmhighway |> table()
##
## living_street motorway motorway_link primary primary_link
## 22 496 306 2998 198
## residential road secondary secondary_link service
## 269488 1186 11576 312 12920
## tertiary tertiary_link trunk trunk_link unclassified
## 15164 146 4014 524 82886
nairobi_roads |> nrow()
## [1] 402236
uber_ed |>
ggplot(aes(x = hour_fact, y = mean_speed_kph, fill = month_fact)) +
geom_boxplot() +
labs(x = "Hour of the Day",
y = "Mean Speed kph",
fill = "Period")

School semester morning and afternoon rush hours appear to
impact overall congestion. The key takeaway from this boxplot
is that we see a positive divergence in mean kph between the school
semester and holiday periods primarily from 6 am to 7 am and from 3 pm
to 4 pm. This suggests that the school semester may be negatively impact
traffic congestion and that further investigation to evaluate these
effects is necessary.
holiday_morn <- school_holiday |>
filter(hour %in% c(6,7)) |>
=======
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
holiday_series <- seq(6,27,1)
school_break <- uber_april |>
filter(day %in% holiday_series)
school_insession <- uber_feb |>
filter(day %in% holiday_series)
school_break_mean <- school_break |>
>>>>>>> 91d647234b2ced621689ab67381ceef66910ad66
group_by(hour, segment_id, start_junction_id, end_junction_id,
osm_way_id, osm_start_node_id, osm_end_node_id) |>
summarise(mean_speed_kph = mean(speed_kph_mean, na.rm = TRUE), .groups = "drop")
<<<<<<< HEAD
school_morn <- school_insession |>
filter(hour %in% c(6,7)) |>
group_by(hour, segment_id, start_junction_id, end_junction_id,
osm_way_id, osm_start_node_id, osm_end_node_id) |>
summarise(mean_speed_kph = mean(speed_kph_mean, na.rm = TRUE), .groups = "drop")
holiday_afternoon <- school_holiday |>
filter(hour %in% c(15,16)) |>
group_by(hour, segment_id, start_junction_id, end_junction_id,
osm_way_id, osm_start_node_id, osm_end_node_id) |>
summarise(mean_speed_kph = mean(speed_kph_mean, na.rm = TRUE), .groups = "drop")
school_afternoon <- school_insession |>
filter(hour %in% c(15,16)) |>
group_by(hour, segment_id, start_junction_id, end_junction_id,
osm_way_id, osm_start_node_id, osm_end_node_id) |>
summarise(mean_speed_kph = mean(speed_kph_mean, na.rm = TRUE), .groups = "drop")
# Add an additional column to each dataframe for grouping
school_morn$group <- "School Semester"
holiday_morn$group <- "Holiday"
# Combine the dataframes
combined_df <- rbind(school_morn,
holiday_morn)
mean_vals <- combined_df |>
group_by(group) |>
summarise(value = mean(mean_speed_kph, na.rm = TRUE)) |>
mutate(type = "mean")
median_vals <- combined_df |>
group_by(group) |>
summarise(value = median(mean_speed_kph, na.rm = TRUE)) |>
mutate(type = "median")
statistics_df <- bind_rows(mean_vals, median_vals)
# Plot the histogram
ggplot(combined_df, aes(x = mean_speed_kph, fill = group)) +
geom_histogram(position = "identity", alpha = 0.5, bins = 30) +
geom_vline(data = statistics_df, aes(xintercept = value, color = group, linetype = type),
size = 1) +
theme_minimal() +
labs(x = "Mean Speed (kph)",
y = "Count",
fill = "Group",
color = "Group",
linetype = "Statistic",
title = "Distribution of Mean Speed during School v Holiday Mornings") +
scale_fill_manual(values = c("School Semester" = "dodgerblue3",
"Holiday" = "firebrick1")) +
scale_color_manual(values = c("School Semester" = "dodgerblue3",
"Holiday" = "firebrick1")) +
scale_linetype_manual(values = c("mean" = "solid",
"median" = "dashed")) +
theme(legend.position = "top",
legend.key.size = unit(2, "lines"), # Increase legend key size
legend.text = element_text(size = 12) # Increase legend text size
)

Concentration of slow roads in morning. A histogram
of mean speeds for all road segments show overlapping but distinct
distributions, means, and medians between the school semester and
holiday period during the morning rush hour. The number of road segments
measured for the school semester and holiday period were 16717 and 18356
segments respectively. The school semester had higher numbers of low
speed roads despite the lower total road segments measured.
# Add an additional column to each dataframe for grouping
school_afternoon$group <- "School Semester"
holiday_afternoon$group <- "Holiday"
# Combine the dataframes
combined_df <- rbind(school_afternoon,
holiday_afternoon)
mean_vals <- combined_df |>
group_by(group) |>
summarise(value = mean(mean_speed_kph, na.rm = TRUE)) |>
mutate(type = "mean")
median_vals <- combined_df |>
group_by(group) |>
summarise(value = median(mean_speed_kph, na.rm = TRUE)) |>
mutate(type = "median")
statistics_df <- bind_rows(mean_vals, median_vals)
# Plot the histogram
ggplot(combined_df, aes(x = mean_speed_kph, fill = group)) +
geom_histogram(position = "identity", alpha = 0.5, bins = 30) +
geom_vline(data = statistics_df, aes(xintercept = value, color = group, linetype = type),
size = 1) +
theme_minimal() +
labs(x = "Mean Speed (kph)",
y = "Count",
fill = "Group",
color = "Group",
linetype = "Statistic",
title = "Distribution of Mean Speed during School v Holiday Afternoon") +
scale_fill_manual(values = c("School Semester" = "dodgerblue3",
"Holiday" = "firebrick1")) +
scale_color_manual(values = c("School Semester" = "dodgerblue3",
"Holiday" = "firebrick1")) +
scale_linetype_manual(values = c("mean" = "solid",
"median" = "dashed")) +
theme(legend.position = "top",
legend.key.size = unit(2, "lines"), # Increase legend key size
legend.text = element_text(size = 12) # Increase legend text size
)

Differences less notable in afternoon. A histogram
of mean speeds for all road segments for the afternoon shows less
difference between the two distributions and their summary statistics.
The number of road segments measured for the school semester and holiday
period were 24099 and 27175 segments respectively. The holiday period
had higher numbers of high speed road segments as compared to the school
semester part of this may be do the higher number of roads measured, but
the difference in total road segments measured doesn’t account for the
difference in distribution of low and high speed road segments for the
holiday period.
morn_diff <- holiday_morn |>
left_join(school_morn, by = c("osm_start_node_id", "osm_end_node_id")) |>
mutate(mean_speed_kph = mean_speed_kph.x - mean_speed_kph.y) |>
filter(is.na(mean_speed_kph) == FALSE)
afternoon_diff <- holiday_afternoon |>
left_join(school_afternoon, by = c("osm_start_node_id", "osm_end_node_id")) |>
mutate(mean_speed_kph = mean_speed_kph.x - mean_speed_kph.y) |>
filter(is.na(mean_speed_kph) == FALSE)
morn_diff$period <- "Morning"
afternoon_diff$period <- "Afternoon"
# Combine the dataframes
combined_diff <- rbind(morn_diff,
afternoon_diff)
mean_vals_diff <- combined_diff |>
group_by(period) |>
summarise(value = mean(mean_speed_kph, na.rm = TRUE)) |>
mutate(type = "mean")
median_vals_diff <- combined_diff |>
group_by(period) |>
summarise(value = median(mean_speed_kph, na.rm = TRUE)) |>
mutate(type = "median")
statistics_diff_df <- bind_rows(mean_vals_diff, median_vals_diff)
# Plot the histogram
ggplot(combined_diff, aes(x = mean_speed_kph, fill = period)) +
geom_histogram(position = "identity", alpha = 0.5, bins = 50) +
geom_vline(data = statistics_diff_df, aes(xintercept = value, color = period, linetype = type),
size = 1) +
theme_minimal() +
labs(x = "Difference in Mean Speed (kph)",
y = "Count",
fill = "Period",
color = "Period",
linetype = "Statistic",
title = "Distribution of Mean kph Difference Holiday v School Semester") +
scale_fill_manual(values = c("Morning" = "dodgerblue3",
"Afternoon" = "firebrick1")) +
scale_color_manual(values = c("Morning" = "dodgerblue3",
"Afternoon" = "firebrick1")) +
scale_linetype_manual(values = c("mean" = "solid",
"median" = "dashed")) +
theme(legend.position = "top",
legend.key.size = unit(2, "lines"), # Increase legend key size
legend.text = element_text(size = 12) # Increase legend text size
)

Majority of road segments slower during school
semester Road segments during the school semester versus the
holiday were both slower on average and slower for the majority of roads
during the school semester as compared to the holiday period. The
difference in speeds was more pronounced in the morning than it was in
the afternoon.
summary_fun <- function(df, name) {
df |>
summarise(
mean = mean(mean_speed_kph, na.rm = TRUE),
median = median(mean_speed_kph, na.rm = TRUE),
se = sd(mean_speed_kph, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
) |>
mutate(data_frame = name)
}
summary_df1 <- summary_fun(school_morn, "School Semester Morning")
summary_df2 <- summary_fun(holiday_morn, "Holiday Morning")
summary_df3 <- summary_fun(school_afternoon, "School Semester Afternoon")
summary_df4 <- summary_fun(holiday_afternoon, "Holiday Afternoon")
summary_df5 <- summary_fun(morn_diff, "School v Holiday Morning")
summary_df6 <- summary_fun(afternoon_diff, "School v Holiday Afternoon")
combined_summary <- bind_rows(summary_df1, summary_df2, summary_df3, summary_df4, summary_df5, summary_df6)
pivoted_summary <- combined_summary |>
pivot_longer(-data_frame, names_to = "statistic", values_to = "value") |>
pivot_wider(names_from = "data_frame", values_from = "value")
print(pivoted_summary)
## # A tibble: 3 × 7
## statistic `School Semeste…` `Holiday Morni…` `School Semest…` `Holiday After…`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 mean 35.5 37.4 31.5 32.5
## 2 median 32.8 35.0 28.3 29.6
## 3 se 0.114 0.109 0.0901 0.0844
## # … with 2 more variables: `School v Holiday Morning` <dbl>,
## # `School v Holiday Afternoon` <dbl>
Differences in speed were signficant across time
periods Based on a quick eye test despite relatively small
differences in speed magnitudally (only a few kph difference) all
differences appear significant due to the high road segment sample sizes
and small standard errors relative to mean differences
morn_diff <- morn_diff |>
left_join(nairobi_roads, by = c("osm_start_node_id" = "osmstartnodeid", "osm_end_node_id" = "osmendnodeid")) |>
st_as_sf() |>
st_drop_geometry() |>
na.omit()
afternoon_diff <- afternoon_diff |>
left_join(nairobi_roads, by = c("osm_start_node_id" = "osmstartnodeid", "osm_end_node_id" = "osmendnodeid")) |>
st_as_sf() |>
st_drop_geometry() |>
na.omit()
morn_cutoff_10 <- quantile(morn_diff$mean_speed_kph, 0.90)
morn_cutoff_20 <- quantile(morn_diff$mean_speed_kph, 0.80)
afternoon_cutoff_10 <- quantile(afternoon_diff$mean_speed_kph, 0.90)
afternoon_cutoff_20 <- quantile(afternoon_diff$mean_speed_kph, 0.80)
# Filter rows where 'value' is greater than the cutoff
morn_top_10 <- morn_diff |>
filter(mean_speed_kph > morn_cutoff_10)
morn_top_20 <- morn_diff |>
filter(mean_speed_kph > morn_cutoff_20)
morn_top10highway <- morn_top_10 |>
group_by(osmhighway) |>
summarise(top10_n = n(), .groups = "drop")
morn_top20highway <- morn_top_20 |>
group_by(osmhighway) |>
summarise(top20_n = n(), .groups = "drop")
afternoon_top_10 <- afternoon_diff |>
filter(mean_speed_kph > afternoon_cutoff_10)
afternoon_top_20 <- afternoon_diff |>
filter(mean_speed_kph > afternoon_cutoff_20)
afternoon_top10highway <- afternoon_top_10 |>
group_by(osmhighway) |>
summarise(top10_n = n(), .groups = "drop")
afternoon_top20highway <- afternoon_top_20 |>
group_by(osmhighway) |>
summarise(top20_n = n(), .groups = "drop")
binom_test_func <- function(k, n, p) {
if(k == 0){
return(1)
} else {
p_val <- binom.test(x = k, n = n, p = p, alternative = "two.sided")$p.value
return(p_val)
}
}
morn_roadcounts <- morn_diff |>
group_by(osmhighway) |>
summarise(n = n(), .groups = "drop")
morn_roadcounts <- morn_roadcounts |>
left_join(morn_top10highway, by = "osmhighway") |>
left_join(morn_top20highway, by = "osmhighway") |>
mutate(expected_n10 = n * 0.1,
percent_top10 = top10_n / n,
expected_n20 = n * 0.2,
percent_top20 = top20_n / n,
p10 = expected_n10 / n,
p20 = expected_n10 / n) |>
mutate_at(c(3,4,6), ~replace_na(.,0))
morn_roadcounts <- morn_roadcounts |>
mutate(
binom_test_pval_top10 = mapply(binom_test_func, top10_n, n, p10),
binom_test_pval_top20 = mapply(binom_test_func, top20_n, n, p20),
rep_top10_morn = if_else(top10_n < expected_n10, "underrepresented", "overrepresented"),
rep_top20_morn = if_else(top20_n < expected_n20, "underrepresented", "overrepresented"))
afternoon_roadcounts <- afternoon_diff |>
group_by(osmhighway) |>
summarise(n = n(), .groups = "drop")
afternoon_roadcounts <- afternoon_roadcounts |>
left_join(afternoon_top10highway, by = "osmhighway") |>
left_join(afternoon_top20highway, by = "osmhighway") |>
mutate(expected_n10 = n * 0.1,
percent_top10 = top10_n / n,
expected_n20 = n * 0.2,
percent_top20 = top20_n / n,
p10 = expected_n10 / n,
p20 = expected_n10 / n) |>
mutate_at(c(3,4,6), ~replace_na(.,0))
afternoon_roadcounts <- afternoon_roadcounts |>
mutate(
binom_test_pval_top10 = mapply(binom_test_func, top10_n, n, p10),
binom_test_pval_top20 = mapply(binom_test_func, top20_n, n, p20),
rep_top10_afternoon = if_else(top10_n < expected_n10, "underrepresented", "overrepresented"),
rep_top20_afternoon = if_else(top20_n < expected_n20, "underrepresented", "overrepresented")
)
combined_tables <- left_join(
select(morn_roadcounts, osmhighway, top10_morn = top10_n, expected10_morn = expected_n10, rep_top10_morn, binom_test_pval_top10_morn = binom_test_pval_top10, top20_morn = top20_n, expected20_morn = expected_n20, rep_top20_morn,
binom_test_pval_top20_morn = binom_test_pval_top20),
select(afternoon_roadcounts, osmhighway, top10_afternoon = top10_n, expected10_afternoon = expected_n10, rep_top10_afternoon, binom_test_pval_top10_afternoon = binom_test_pval_top10, top20_afternoon = top20_n, expected20_afternoon = expected_n20, rep_top20_afternoon,
binom_test_pval_top20_afternoon = binom_test_pval_top20),
by = "osmhighway"
)
combined_tables %>%
kable("html") %>%
kable_styling(bootstrap_options = "striped", full_width = T)
osmhighway
top10_morn
expected10_morn
rep_top10_morn
binom_test_pval_top10_morn
top20_morn
expected20_morn
rep_top20_morn
binom_test_pval_top20_morn
top10_afternoon
expected10_afternoon
rep_top10_afternoon
binom_test_pval_top10_afternoon
top20_afternoon
expected20_afternoon
rep_top20_afternoon
binom_test_pval_top20_afternoon
motorway
58
84.8
underrepresented
0.0016046
103
169.6
underrepresented
0.0393425
32
94.2
underrepresented
0.0000000
64
188.4
underrepresented
0.0007368
motorway_link
15
18.0
underrepresented
0.5347615
29
36.0
underrepresented
0.0121346
12
24.4
underrepresented
0.0053848
23
48.8
underrepresented
0.8315963
primary
278
212.2
overrepresented
0.0000049
511
424.4
overrepresented
0.0000000
227
322.5
underrepresented
0.0000000
428
645.0
underrepresented
0.0000000
primary_link
14
16.5
underrepresented
0.6041855
29
33.0
underrepresented
0.0025739
17
19.2
underrepresented
0.7177397
29
38.4
underrepresented
0.0291952
residential
310
296.3
overrepresented
0.3913876
676
592.6
overrepresented
0.0000000
800
711.6
overrepresented
0.0005841
1577
1423.2
overrepresented
0.0000000
secondary
1105
897.1
overrepresented
0.0000000
1984
1794.2
overrepresented
0.0000000
1413
1220.2
overrepresented
0.0000000
2617
2440.4
overrepresented
0.0000000
secondary_link
7
9.8
underrepresented
0.4981257
18
19.6
underrepresented
0.0104456
8
18.8
underrepresented
0.0069246
20
37.6
underrepresented
0.7159794
service
3
3.0
overrepresented
1.0000000
6
6.0
overrepresented
0.1155813
7
4.1
overrepresented
0.1840965
10
8.2
overrepresented
0.0061063
tertiary
489
706.2
underrepresented
0.0000000
1148
1412.4
underrepresented
0.0000000
885
1005.2
underrepresented
0.0000532
2005
2010.4
underrepresented
0.0000000
tertiary_link
0
1.5
underrepresented
1.0000000
1
3.0
underrepresented
1.0000000
3
3.4
underrepresented
1.0000000
7
6.8
overrepresented
0.0759572
trunk
174
163.9
overrepresented
0.4101359
356
327.8
overrepresented
0.0000000
321
219.2
overrepresented
0.0000000
613
438.4
overrepresented
0.0000000
trunk_link
20
33.1
underrepresented
0.0132803
46
66.2
underrepresented
0.0217809
22
43.2
underrepresented
0.0002895
60
86.4
underrepresented
0.0100172
unclassified
153
183.4
underrepresented
0.0175230
345
366.8
underrepresented
0.0000000
321
381.3
underrepresented
0.0009845
682
762.6
underrepresented
0.0000000
Binomial test shows that or many road types they are
significantly over and underepresented in the top 10 and 20 percentile
of road segments with the highest speed differences between the school
semester and holiday period To clarify the percentiles here are
taking the top 10% and 20% of road segments with the greatest positive
mean difference in speed (experienced faster mean speeds kph) between
the school semester and the holiday period. These are the roads that saw
the greatest reduction in traffic congestion during the holiday period
as compared to the school semester. Secondary and primary roads
were significantly overrepresented and tertiary and
motorways significant underrepresented in the top 10
percentile of road segments with the highest speed differences between
the school semester and holiday period in the morning. In the
top 20 percentile for the morning, trunk roads and residential
became significantly overrepresented while secondary and primary roads
remain significantly over represented. Tertiary and motorways continued
to be significantly under represented in the morning In the
afternoon across both the top 10 and 20 percentiles, motorways,
primary and tertiary roads were significantly underrepresented, while
residential, secondary and trunk roads were significantly
overrepresented.
In conclusion we see varying traffic congestion patterns
across the city in the morning and afternoon, but across time periods,
secondary roads tend to be highly overrepresented while motorways and
tertiary roads tend to be underrepresented.
=======
school_insession_mean <- school_insession |>
group_by(hour, segment_id, start_junction_id, end_junction_id,
osm_way_id, osm_start_node_id, osm_end_node_id) |>
summarise(mean_speed_kph = mean(speed_kph_mean, na.rm = TRUE), .groups = "drop")
school_insession_mean_6am <- school_insession_mean |>
filter(hour == 6)
school_break_mean_6am <- school_break_mean |>
filter(hour == 6)
speed_dif_6am <- school_break_mean_6am |>
left_join(school_insession_mean_6am, by = c("osm_start_node_id", "osm_end_node_id")) |>
mutate(diff_speed_kph = mean_speed_kph.x - mean_speed_kph.y)
school_insession_mean_6am <- school_insession_mean_6am |>
left_join(nairobi_roads, by = c("osm_start_node_id" = "osmstartnodeid", "osm_end_node_id" = "osmendnodeid")) |>
st_as_sf()
school_break_mean_6am <- school_break_mean_6am |>
left_join(nairobi_roads, by = c("osm_start_node_id" = "osmstartnodeid", "osm_end_node_id" = "osmendnodeid")) |>
st_as_sf()
speed_dif_6am <- speed_dif_6am |>
left_join(nairobi_roads, by = c("osm_start_node_id" = "osmstartnodeid", "osm_end_node_id" = "osmendnodeid")) |>
st_as_sf()
0.4 3 ESDA
tmap_mode("view")
## tmap mode set to interactive viewing
tm1 <- tm_shape(school_insession_mean_6am) +
tm_lines("mean_speed_kph")
tm2 <- tm_shape(school_break_mean_6am) +
tm_lines("mean_speed_kph")
tm3 <- tm_shape(speed_dif_6am) +
tm_lines("diff_speed_kph")
tmap_arrange(tm1,tm2,tm3, sync = T)
## Warning: The shape school_insession_mean_6am contains empty units.
## Warning: The shape school_break_mean_6am contains empty units.
## Warning: The shape speed_dif_6am contains empty units.
## Variable(s) "diff_speed_kph" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Warning: The shape school_insession_mean_6am contains empty units.
## Warning: The shape school_break_mean_6am contains empty units.
## Warning: The shape speed_dif_6am contains empty units.
## Variable(s) "diff_speed_kph" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
hist(speed_dif_6am$diff_speed_kph)

>>>>>>> 91d647234b2ced621689ab67381ceef66910ad66